library(tidyverse)
library(tigris)
library(sf)
library(sp)
library(mapview)
library(usmap)
library(rjson)
library(leaflet)
library(stringdist)
library(measurements)
library(units)
library(pracma)
library(plotly)
library(ggplot2)
library(usmap)
library(rjson)
library(USAboundaries)
library(censusapi)

Sys.setenv(CENSUS_KEY="b88495f8315f45b2d100dee9ba8f4c489a7371c2")

options(
  tigris_class = "sf",
  tigris_use_cache = TRUE
)

`%notin%` <- Negate(`%in%`)

projection <- "+proj=utm +zone=10 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=ft +no_defs"
##add more data... from previous weeks
park_origins <- readRDS("smc_park_origins.rds")

smc_boundary <- readRDS("smc_boundary.rds") %>% st_set_crs("+proj=longlat +datum=WGS84")

park_geometry <- 
  readRDS('park_output_geometry_smc.rds') %>%
  st_as_sf() %>%
  st_transform(projection)

smc_population <-
  getCensus(
    name = "acs/acs5",
    vintage = 2018,
    region = "block group:*",
    regionin = "state:06+county:081",
    vars = c("group(B01003)")
  ) %>%
  mutate(
    origin_census_block_group =
      paste0(state,county,tract,block_group)
  ) %>% 
  rename(
    total_pop = "B01003_001E"
  ) %>%
  dplyr::select(total_pop, origin_census_block_group)

Is park accessibility a privilege? Does the location of where one lives impact the ability to enjoy outdoor green space? The goal of this analysis is to provide insight on the communities within San Mateo County that have the least visits to county parks. We hypothesize that people who live in high-density urban areas have a lower likelihood of direct park access, and vice versa. For this analysis, we use Safegraph Patterns Data.

Safegraph Data Collection Explanation

The following describes how the Safegraph data is processed:

Safegraph collects device visit information to “places of interest” (POI) like parks or grocery stores and includes some information about a device’s origin census block group. The visit counts in the Safegraph dataset fall into either one of these three cases:

Census Block Groups with Visits to SMC Parks

When looking at park visitors who live in specific census block groups (CBGs), it is not sufficient to consider the CBG visit numbers at face value. The population of each block group plays an important factor in weighing a community’s visits to parks. For example, if a CBG has 3 residents, and they have an average of 3 daily park visits, then we can infer that this community has very active park visitors (even though the 3 park visits may seem low).

The following map displays all of the census block groups we considered in this analysis (see the Safegraph Data Collection Explanation section above for why some CBGs are missing). The “Park Visits” option illustrates the average number of visits that each CBG has had to a SMC park since January 1, 2020. The “Park Visits per Population” displays a ratio, which provides more insight into how actively a community visits parks (a higher ratio = more active, a lower ratio = less active).

park_origin_mean <-
  park_origins %>%
  group_by(origin_census_block_group) %>%
  summarise(mean_visits = mean(mean_origin_visits))

visit_pop_combo <-
  smc_population %>%
  left_join(
    park_origin_mean,
    by = "origin_census_block_group"
  ) %>%
  na.omit() %>%
  mutate(
    visits_per_pop = round(mean_visits/total_pop,3)
  ) %>%
  filter(origin_census_block_group != "060819843001")

geometry <-
  park_origins %>%
  filter(origin_census_block_group %in% visit_pop_combo$origin_census_block_group)%>%
  dplyr::select(origin_census_block_group, geometry) %>%
  st_as_sf()

geometry <- geometry[!duplicated(geometry), ]

all_visitors <- 
  visit_pop_combo %>%
  left_join(
    geometry,
    by = "origin_census_block_group"
  ) %>%
  st_as_sf() %>%
  st_transform(projection)

###add toggle between park visits and visits per pop

blue_pal <- colorNumeric(
  palette = colorRamp(c("#6390ff", "#00288c"), interpolate="spline"),
  domain =
    all_visitors %>%
    pull(mean_visits) %>%
    unique()
)

teal_pal <- colorNumeric(
  palette = colorRamp(c("#02c0de", "#015a68"), interpolate="spline"),
  domain =
    all_visitors %>%
    pull(visits_per_pop) %>%
    unique()
)

all_visitors_map <- 
  leaflet() %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
 addPolygons(
    data = smc_boundary,
    fill = F,
    color = "gray",
    weight = 1,
    label = ~NAME
  ) %>%
  addPolygons(
    data = all_visitors %>%
      st_transform(4326),
    fillColor = ~blue_pal(mean_visits),
    color = "black",
    weight = 0.5,
    opacity = 0.5,
    fillOpacity = 0.75,
    group = "Park Visits",
    label = ~paste0(mean_visits," average daily visits to SMC parks since January 1, 2020"),
    highlightOptions = 
      highlightOptions(
        weight = 2.25,
        opacity = 1
      )) %>%
  addPolygons(
    data = all_visitors %>%
      st_transform(4326),
    fillColor = ~teal_pal(visits_per_pop),
    color = "black",
    weight = 0.5,
    opacity = 0.5,
    fillOpacity = 0.75,
    group = "Park Visits per Population",
    label = ~paste0(visits_per_pop," park visits per population"),
    highlightOptions = 
      highlightOptions(
        weight = 2.25,
        opacity = 1
      )
  ) %>%
  addLayersControl(
    baseGroups = c("Park Visits", "Park Visits per Population"),
    options = layersControlOptions(collapsed = FALSE)
  )

all_visitors_map
#mapview(all_visitors, zcol = "visits_per_pop")

Communities with the Lowest and Highest Park Visits

What communities visit parks more or less frequently than others? We will now directly compare the CBGs with the lowest and highest park visits per population ratios since January 1, 2020. The mean visit per population value is 0.068, and the standard deviation is +/-0.045. We considered a low visits per population ratio to be less than 0.022, and a high ratio to be more than 0.113 (one SD less/more than the mean, respectively).

lowest_visitors <-
  visit_pop_combo %>%
  filter(visits_per_pop < mean(visit_pop_combo$visits_per_pop)-sd(visit_pop_combo$visits_per_pop)) %>%
  left_join(
    geometry,
    by = "origin_census_block_group"
  ) %>%
  st_as_sf() %>%
  st_transform(projection)

teal_pal2 <- colorNumeric(
  palette = colorRamp(c("#015a68", "#015a68"), interpolate="spline"),
  domain =
    lowest_visitors %>%
    pull(visits_per_pop) %>%
    unique()
)

highest_visitors <-
  visit_pop_combo %>%
  filter(visits_per_pop > mean(visit_pop_combo$visits_per_pop)+sd(visit_pop_combo$visits_per_pop)) %>%
  left_join(
    geometry,
    by = "origin_census_block_group"
  ) %>%
  st_as_sf() %>%
  st_transform(projection)

orange_pal <- colorNumeric(
  palette = colorRamp(c("#d4a653", "#b87909"), interpolate="spline"),
  domain =
    highest_visitors %>%
    pull(visits_per_pop) %>%
    unique()
)

extremes_visitors_map <- 
  leaflet() %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
 addPolygons(
    data = smc_boundary,
    fill = F,
    color = "gray",
    weight = 1,
    label = ~NAME
  ) %>%
  addPolygons(
    data = lowest_visitors %>%
      st_transform(4326),
    fillColor = ~teal_pal2(visits_per_pop),
    color = "black",
    weight = 0.5,
    opacity = 0.5,
    fillOpacity = 0.75,
    group = "CBGs with Lowest Park Visit Ratio",
    label = ~paste0(visits_per_pop," park visits per population"),
    highlightOptions = 
      highlightOptions(
        weight = 2.25,
        opacity = 1
      )
  ) %>%
  addPolygons(
    data = highest_visitors %>%
      st_transform(4326),
    fillColor = ~orange_pal(visits_per_pop),
    color = "black",
    weight = 0.5,
    opacity = 0.5,
    fillOpacity = 0.75,
    group = "CBGs with Highest Park Visit Ratio",
    label = ~paste0(visits_per_pop," park visits per population"),
    highlightOptions = 
      highlightOptions(
        weight = 2.25,
        opacity = 1
      )
  ) %>%
  addLayersControl(
    baseGroups = c("CBGs with Lowest Park Visits", "CBGs with Highest Park Visit Ratio"),
    options = layersControlOptions(collapsed = FALSE)
  )

extremes_visitors_map
#mapview(lowest_visitors, zcol = "visits_per_pop")

The majority of the CBGs with the lowest park visits per population ratio appear to be bordering the bay lands. The CGBs with the highest park visits per population, on the other hand, appear to be in some of the less-urban areas.

Accessibility: Minimum Distance to a County Park from SMC Block Groups

In order to gauge park accessibility of people living in certain block groups, we need to bring distance into the equation.

First, we start by finding the centroids of the census block group shapes and the SMC county park shapes.

all_centroid <-
  all_visitors %>%
  st_centroid()

park_centroid <-
  park_geometry %>%
  st_centroid()

mapview(all_centroid,col.regions = "blue", layer.name = "CBG Centroids") + mapview(park_centroid, col.regions = "green", layer.name = "SMC Park Centroids")

Next, we find all of the distances from CBG centroids to SMC county park centroids, and take the minimum distance for each CBG.

combo_centroids <- data.frame(cbg = NA, park_name = NA, park_dist = NA)
# 
# for(i in 1:nrow(all_centroid)){
#   for(j in 1:nrow(park_centroid)){
#     temp_df <- data.frame(
#       cbg = all_centroid$origin_census_block_group[i],
#       park_name = park_centroid$location_name[j],
#       park_dist = st_distance(all_centroid$geometry[i],park_centroid$geometry[j])
#     )
#     
#     combo_centroids <-
#       combo_centroids %>%
#       rbind(temp_df)
#   }
#   
#   if(i%%5 == 0){print(i)}
#   
# }
# 
# combo_centroids_adjust <-
#   combo_centroids %>%
#   na.omit() %>%
#   mutate(
#     Distance = round(park_dist / 5280,2)
#     )

#saveRDS(combo_centroids_adjust,"smc_cbg_park_dist.rds")

combo_centroids_adjust <- readRDS("smc_cbg_park_dist.rds")

cbg_min_dist_park <-
  combo_centroids_adjust %>%
  group_by(cbg) %>%
  summarise(min_park_dist = min(Distance)) %>%
  left_join(
    geometry,
    by = c("cbg" = "origin_census_block_group")
  ) %>%
  st_as_sf() %>%
  st_transform(projection)



mapview(cbg_min_dist_park,zcol = "min_park_dist", layer.name = "Minimum Distance to a SMC Park (mi)")

The parks that have a distance greater than 5 miles are displayed isolated in the following map:

furthest_dist <-
  cbg_min_dist_park %>%
  filter(min_park_dist > 5)

mapview(furthest_dist,zcol = "min_park_dist", layer.name = "> 5 Mile Minimum Distance to SMC park")
combo_metric <-
  all_visitors %>%
  left_join(
    cbg_min_dist_park %>%
    st_set_geometry(NULL),
    by = c("origin_census_block_group" = "cbg")
  ) %>%
  st_set_geometry(NULL)

lim_mod <- lm(combo_metric$min_park_dist ~ combo_metric$visits_per_pop)

lim_scatter <-
  combo_metric %>% 
  ggplot(
    aes(
      x = min_park_dist,
      y = visits_per_pop
    )
  ) +
  geom_point() +
  geom_smooth(method=lm) +
  labs(
    x = "Minimum Distance to SMC Park",
    y = "Visits per CBG Population",
    title = "SMC Park Accessibility: Parks Visits per Population vs. Distance to Parks"
  )

lim_scatter

summary(lim_mod)
## 
## Call:
## lm(formula = combo_metric$min_park_dist ~ combo_metric$visits_per_pop)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9061 -1.0882 -0.2092  0.8444  5.6816 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   2.5099     0.1654  15.174   <2e-16 ***
## combo_metric$visits_per_pop   3.3704     1.9202   1.755   0.0803 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.542 on 274 degrees of freedom
##   (75 observations deleted due to missingness)
## Multiple R-squared:  0.01112,    Adjusted R-squared:  0.00751 
## F-statistic: 3.081 on 1 and 274 DF,  p-value: 0.08034